import urllib.request
import urllib.parse
import urllib.request
import re
from lxml import etree

import lxml.html

import Conn


def getCodeAndPrice(url):

    # user_agent = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11'
    # req.add_header('user_agent',user_agent)
    str=re.split("/|\.",url)
    code=str[5]
    price = doGetPrice(url)
    return code,price


def doGetPrice(url):
    if (url==None or len(url)==0):
        return None
    req = urllib.request.Request(url)
    response = urllib.request.urlopen(req)
    the_page = response.read()
    the_page = the_page.decode("GBK", 'ignore')

    tree=etree.HTML(str(the_page))
    body=str(tree.xpath('//*[@id="dd-price"]/text()'))
    res_tr = r'([-+]?[0-9]*\.?[0-9]+)'
    p_tr = re.findall(res_tr, body)

    # res_tr = r'<p id="dd-price">(.*?)<span class="yen">&yen;</span>(.*?)\s*</p>'
    # p_tr = re.findall(res_tr, the_page, re.S | re.M)
    # price = float(p_tr[0][1])
    return p_tr[0]


def downloadPicture(url,ID):
    res_tr=r'<img id="largePic"(.*?)http://img(.*?).ddimg.cn/(.*?).jpg"(.*?)'
    response = urllib.request.urlopen(url)
    the_page = response.read()
    the_page = the_page.decode("GBK",'ignore')


    match=re.search(res_tr,the_page,re.S|re.M)
    temp=the_page[match.start():match.end()]

    url_tr=r'(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]'
    match = re.search(url_tr, temp, re.S|re.M)

    pictureUrl=temp[match.start():match.end()]
    urllib.request.urlretrieve(pictureUrl,Conn.getPictureLoc()+str(ID)+".jpg")


print(getCodeAndPrice("http://product.dangdang.com/1025874094.html"))